require 'pp'
# add mrtoolkit to $RUBYLIB
if ENV['RUBYLIB']
  ENV['RUBYLIB'] = ENV['RUBYLIB'].split(':').concat(['../lib']).uniq.join(':')
else
  ENV['RUBYLIB'] = '../lib'
end
# add everything in $RUBYLIB to search path
ENV['RUBYLIB'].split(':').each {|f| $:.concat([f]) unless $:.include?(f)}

require 'mrtoolkit'
require 'time'


def outfiles(base) 
  "out/#{base}/part-*"
end


######################################################################
desc "clean up"
task "clean" do
  to_clean = %w{hour hours ip ips section sections top-file top-files ip-size ip-sizes ip-result ip-results ip-uas ip-ua out }
  to_clean.each {|f| system "rm -rf #{f}"}
end

######################################################################
desc "import data to hdfs"
task "import" do
  system "./import-logs"
end

######################################################################
desc "traffic by IP address"
file "ips" do
  system "ruby ip.rb"
  system "cat #{outfiles('ip')}|sort -nr -k2 >ips"
end

######################################################################
desc "returned result size by IP address"
file "ip-sizes" do
  system "ruby ip-size.rb"
  system "cat #{outfiles('ip-size')}|sort -nr -k2 >ip-sizes"
end

######################################################################
desc "show all combinations of ip and user agent"
file "ip-uas" do
  system "ruby ip-ua.rb"
  system "cat #{outfiles('ip-ua')}|cut -f 2-4|sort -k2 -k3 >ip-uas"
end

######################################################################
desc "show all combinations of ip and result"
file "ip-results" do
  system "ruby ip-result.rb"
  system "cat #{outfiles('ip-result')}|sort -k1 -k2 >ip-results"
end

######################################################################
desc "traffic by hour"
file "hours" do
  system "ruby hour.rb"
  system "cat #{outfiles('hour')}|sort -n >hours"
end

######################################################################
desc "traffic by section"
file "sections" do
  system "ruby section.rb"
  system "cat #{outfiles('section')}|sort -nr -k2 >sections"
end

######################################################################
desc "top 10 files"
file "top-files" do
  system "ruby top-file.rb"
  system "cat #{outfiles('top-file')} >top-files"
end
